--- /dev/null
+ * Added filter log series by using a regular expressions. (issue #100)
// should be used with the goleri module.
//
// Source class: SiriGrammar
-// Created at: 2018-06-14 16:27:16
+// Created at: 2018-06-22 15:10:04
import (
"regexp"
string,
rInteger,
rFloat,
+ rRegex,
kNan,
kInf,
kNinf,
string,
r_integer,
r_float,
+ r_regex,
k_nan,
k_inf,
k_ninf,
#include <slist/slist.h>
#include <cexpr/cexpr.h>
#include <qpack/qpack.h>
+#include <pcre2.h>
typedef struct siridb_point_s siridb_point_t;
typedef struct siridb_points_s siridb_points_t;
uint64_t limit;
uint64_t offset;
double timespan; // used for derivative
+ pcre2_code * regex; \
+ pcre2_match_data * match_data;
qp_via_t filter_via;
} siridb_aggr_t;
* should be used with the libcleri module.
*
* Source class: SiriGrammar
- * Created at: 2018-06-14 16:27:16
+ * Created at: 2018-06-22 15:10:04
*/
#ifndef CLERI_EXPORT_SIRI_GRAMMAR_GRAMMAR_H_
#define CLERI_EXPORT_SIRI_GRAMMAR_GRAMMAR_H_
#include <siri/db/group.h>
#include <siri/db/series.h>
#include <siri/db/user.h>
+#include <pcre2.h>
#define QUERIES_IGNORE_DROP_THRESHOLD 1
#define QUERIES_SKIP_GET_POINTS 2
#define SIRIDB_VERSION_MAJOR 2
#define SIRIDB_VERSION_MINOR 0
-#define SIRIDB_VERSION_PATCH 28
+#define SIRIDB_VERSION_PATCH 29
#define SIRIDB_STRINGIFY(num) #num
#define SIRIDB_VERSION_STR(major,minor,patch) \
#include <siri/db/median.h>
#include <siri/db/variance.h>
#include <siri/grammar/grammar.h>
+#include <siri/db/re.h>
#include <slist/slist.h>
#include <stddef.h>
#include <strextra/strextra.h>
static AGGR_cb AGGREGATES[F_OFFSET];
static siridb_aggr_t * AGGREGATE_new(uint32_t gid);
+static int AGGREGATE_regex_cmp(siridb_aggr_t * aggr, char * val);
static void AGGREGATE_free(siridb_aggr_t * aggr);
static int AGGREGATE_init_filter(
siridb_aggr_t * aggr,
aggr->limit = 0;
aggr->offset = 0;
aggr->timespan = 1.0;
- aggr->filter_tp = TP_INT; /* when string we must
- * malloc/free * aggr->filter_via.raw */
+ aggr->regex = NULL;
+ aggr->match_data = NULL;
+ aggr->filter_via.raw = NULL;
+ aggr->filter_tp = TP_INT; /* when string we must cleanup more */
return aggr;
}
if (aggr->filter_tp == TP_STRING)
{
free(aggr->filter_via.raw);
+ pcre2_code_free(aggr->regex);
+ pcre2_match_data_free(aggr->match_data);
}
free(aggr);
}
(char *) aggr->filter_via.raw, node->str, node->len);
return 0;
+ case CLERI_GID_R_REGEX:
+ if (aggr->filter_opr != CEXPR_EQ && aggr->filter_opr != CEXPR_NE)
+ {
+ sprintf(err_msg,
+ "Regular expressions can only be used with 'equal' (==) "
+ "or 'not equal' (!=) operator.");
+ return -1;
+ }
+ aggr->filter_tp = TP_STRING;
+ /* extract and compile regular expression */
+ if (siridb_re_compile(
+ &aggr->regex,
+ &aggr->match_data,
+ node->str,
+ node->len,
+ err_msg))
+ {
+ return -1; /* error_msg is set */
+ }
+ return 0;
+
default:
assert (0);
break;
return points;
}
+static int AGGREGATE_regex_cmp(siridb_aggr_t * aggr, char * val)
+{
+ int ret;
+ ret = pcre2_match(
+ aggr->regex,
+ (PCRE2_SPTR8) val,
+ strlen(val),
+ 0, // start looking at this point
+ 0, // OPTIONS
+ aggr->match_data,
+ 0); // length of sub_str_vec
+ return aggr->filter_opr == CEXPR_EQ ? ret >= 0 : ret < 0;
+}
+
static siridb_points_t * AGGREGATE_filter(
siridb_points_t * source,
siridb_aggr_t * aggr,
siridb_points_t * points = siridb_points_new(source->len, source->tp);
-
if (points == NULL)
{
sprintf(err_msg, "Memory allocation error.");
i < source->len;
i++, spt++)
{
- if (cexpr_str_cmp(aggr->filter_opr, spt->val.str, value.str))
+ if (value.str != NULL // NULL is a regular expression
+ ? cexpr_str_cmp(
+ aggr->filter_opr,
+ spt->val.str, value.str)
+ : AGGREGATE_regex_cmp(aggr, spt->val.str))
{
dpt->ts = spt->ts;
dpt->val.str = strdup(spt->val.str);
* should be used with the libcleri module.
*
* Source class: SiriGrammar
- * Created at: 2018-06-14 16:27:16
+ * Created at: 2018-06-22 15:10:04
*/
#include "siri/grammar/grammar.h"
cleri_choice(
CLERI_NONE,
CLERI_MOST_GREEDY,
- 6,
+ 7,
string,
r_integer,
r_float,
+ r_regex,
k_nan,
k_inf,
k_ninf
import random
import time
import math
+import re
from testing import Client
from testing import default_test_setup
from testing import gen_data
[1447253549, 538],
[1447254748, 537]]})
+ self.assertEqual(
+ await self.client0.query(
+ 'select filter(/l.*/) from * where type == string'),
+ {'log': [p for p in DATA['log'] if re.match('l.*', p[1])]})
+
+ self.assertEqual(
+ await self.client0.query(
+ 'select filter(==/l.*/) from * where type == string'),
+ {'log': [p for p in DATA['log'] if re.match('l.*', p[1])]})
+
+ self.assertEqual(
+ await self.client0.query(
+ 'select filter(!=/l.*/) from * where type == string'),
+ {'log': [p for p in DATA['log'] if not re.match('l.*', p[1])]})
+
self.assertEqual(
await self.client0.query('select limit(300, mean) from "aggr"'),
{'aggr': DATA['aggr']})
await self.client0.query('select difference() from "one"'),
{'one': []})
+ with self.assertRaisesRegexp(
+ QueryError,
+ 'Regular expressions can only be used with.*'):
+ await self.client0.query('select filter(~//) from "log"')
+
+ with self.assertRaisesRegexp(
+ QueryError,
+ 'Cannot use a string filter on number type.'):
+ await self.client0.query('select filter(//) from "aggr"')
+
with self.assertRaisesRegexp(
QueryError,
'Cannot use mean\(\) on string type\.'):